## Copyright (C) 2017 Nir Krakauer
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; If not, see <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn {Function File} {[@var{pval}, @var{W}] = } @
## shapiro_wilk_test (@var{x})
##
## Test the hypothesis that @var{x} is from a normal distribution
## using the Shapiro-Wilk test. If the returned @var{pval} is small, reject
## the hypothesis at the @var{pval}*100% level.
##
## The test statistic @var{W} is based on the correlation between the order statistics of the data
## with those expected from a normal distribution with the same mean.
##
## References: @*
## Patrick Royston (1992), "Approximating the Shapiro-Wilk W-test for non-normality", Statistics and Computing, 2(3): 117-119 @*
## Patrick Royston (1995), "AS R94: a remark on Algorithm AS 181: the W-test for normality", Journal of the Royal Statistical Society. Series C (Applied Statistics), 44(4): 547-551.
##
## @end deftypefn

## Author: Nir Krakauer <mail@nirkrakauer.net>


function [pval, W] = SWtest (x)

#the W statistic is sum(A .* x).^2 / sumsq(center(x))
#where A is a vector of expected values of order statistics from a standard normal distribution
#once x is ordered, A is an antisymmetric vector, with A(n+1-i) = -A(i), 1 <= i <= n, and A'*A = 1
#because of this, only half of A needs to be calculated; we consider a = A(n:-1:(n+1-floor(n/2)))

n = numel (x);
nh = floor (n/2);

if n < 3
  error ('The input vector x should have at least 3 elements')
endif

if n == 3
  a = sqrt(0.5); #exact
else
  #get an initial estimate of a
  m = -norminv(((1:nh)' - 0.375) / (n + 0.25));
  msq = 2*sumsq(m);
  mrms = sqrt (msq);
  #correction factors for initial elements of a (derived to be good approximations for 4 <= n <= 1000)
  rsn = 1 / sqrt(n);
  ac1 = m(1)/mrms + polyval ([-2.706056 4.434685 -2.07119 -0.147981 0.221157 0], rsn);
  if n <= 5
    phi = (msq - 2*m(1)^2) / (1 - 2*ac1^2);
    a = [ac1; m(2:nh)/sqrt(phi)];
  else
    ac2 = m(2)/mrms + polyval ([-3.582633 5.682633 -1.752461 -0.293762 0.042981 0], rsn);
    phi = (msq - 2*m(1)^2 - 2*m(2)^2) / (1 - 2*ac1^2 - 2*ac2^2);
    a = [ac1; ac2; m(3:nh)/sqrt(phi)];
  endif
endif

x = sort (x(:));
xm = mean (x);
W = sum(a .* (x(n:-1:n+1-nh) - x(1:nh))).^2 / sumsq(x - xm);

#p value for the W statistic being as small as it is for a normal distribution
if n == 3
  pval = (6/pi) * (asin(sqrt(W)) - asin(sqrt(0.75)));
elseif n <= 11
  gamma = 0.459*n - 2.273;
  w = -log(gamma - log(1 - W));
  mu = polyval ([-0.0006714 0.025054 -0.39978 0.5440], gamma);
  sigma = exp (polyval([-0.0020322 0.062767 -0.77857 1.3822], n));
  pval = normcdf ((mu - w) / sigma);
else
  nl = log (n);
  w = log (1 - W);
  mu = polyval ([0.0038915 -0.083751 -0.31082 -1.5861], nl);
  sigma = exp (polyval ([0.0030302 -0.082676 -0.4803], nl));
  pval = normcdf ((mu - w) / sigma);
endif
